import threading
from threading import Lock,Thread
import time,os

def run(n):
    def get_company_message(company):
        # 获取查询到的网页内容（全部）
        req = requests.get('https://www.qcc.com/search?key={}'.format(company), headers=headers)
        # req.raise_for_status()
        req.encoding = 'utf-8'  # linux utf-8
        # print(req.text)
        soup = BeautifulSoup(req.text, features="html.parser")
        if soup.find_all('a', {'class': 'title'}):
            href = soup.find_all('a', {'class': 'title'})[0].get('href')
        else:
            return
        print(href)
        # 获取查询到的网页内容（全部）
        details = requests.get(href, headers=headers, timeout=10)
        details.raise_for_status()
        details.encoding = 'utf-8'  # linux utf-8
        details_soup = BeautifulSoup(details.text, features="html.parser")
        try:
            message = details_soup.select('#Cominfo')[0].text
        except:
            message = None
        return message

    # 读取表格
    def get_companys(sheet_name):
        companys = []
        # 读取路径
        book = load_workbook(filename=r"济宁能源往来单位清单清洗-王巍.xlsx")
        # 读取名字为Sheet1的表
        sheet = book['Sheet1']
        # 用于存储数据的数组
        row_num = 16389

        while row_num <= 16500:
            company = sheet.cell(row=row_num, column=2).value
            if company:
                companys.append(company)
                row_num = row_num + 1
            else:
                break
        book.close()
        print(companys)
        return companys

    # 写入表格
    def input_execl(datas, sheet_name):
        df = DataFrame(datas)
        df.to_excel('企业信息11.xlsx', sheet_name='Sheet1')
        print('爬取成功！！')

    # 分解爬取的信息
    company_name = []
    name = []
    no = []
    status = []
    type = []
    addr = []
    home = []

    def getItem(item):
        if len(item.split('纳税人识别号')) > 1 and len(item.split('纳税人识别号')[1].split('\n')) > 1:
            no.append(item.split('纳税人识别号')[1].split('\n')[1].replace(' ', ''))
        else:
            if len(item.split('统一社会信用代码')) > 1 and len(item.split('统一社会信用代码')[1].split('\n')) > 1:
                no.append(item.split('统一社会信用代码')[1].split('\n')[1].replace(' ', ''))
            else:
                no.append('未获取到！')

        if len(item.split('法定代表人')) > 1 and len(item.split('法定代表人')[1].split('\n')) > 1 and len(
                item.split('法定代表人')[1].split('\n')[1].split('关联')) > 1:
            name.append(item.split('法定代表人')[1].split('\n')[1].split('关联')[0].replace(' ', ''))
        else:
            name.append('未获取到！')

        if len(item.split('登记状态')) > 1 and len(item.split('登记状态')[1].split('\n')) > 1 and len(
                item.split('登记状态')[1].split('\n')[1].split('成立日期')) > 1:
            status.append(item.split('登记状态')[1].split('\n')[1].split('成立日期')[0].replace(' ', ''))
        else:
            if len(item.split('登记状态')) > 1 and len(item.split('登记状态')[1].split('\n')) > 1 and len(
                    item.split('登记状态')[1].split('\n')[1].split('举办单位')) > 1:
                status.append(item.split('登记状态')[1].split('\n')[1].split('举办单位')[0].replace(' ', ''))
            else:
                status.append('未获取到！')

        if len(item.split('企业类型')) > 1 and len(item.split('企业类型')[1].split('\n')) > 1:
            type.append(item.split('企业类型')[1].split('\n')[1].replace(' ', ''))
        else:
            type.append('未获取到！')

        if len(item.split('地址')) > 1:
            addr.append(item.split('地址')[1].split('附近企业')[0].split('宗旨和业务范围')[0].replace(' ', '').replace('\n', ''))
        else:
            addr.append('未获取到！')

        if len(item.split('登记机关')) > 1 and len(item.split('登记机关')[1].split('\n')) > 1:
            home.append(item.split('登记机关')[1].split('\n')[1].replace(' ', ''))
        else:
            home.append('未获取到！')

    for i in range(1, 2):
        company_name = []
        name = []
        no = []
        status = []
        type = []
        addr = []
        home = []
        sheet_name = 'Sheet' + str(i)
        print(sheet_name)
        companys = get_companys(sheet_name)
        company_no = []
        comoany_name = []

        for company in companys:
            messages = get_company_message(company)
            print(company)
            company_name.append(company)
            if messages:
                # print(messages)
                print('-------------')
                getItem(messages)
            else:
                print('未获取到------')
                no.append('未获取到！')
                name.append('未获取到！')
                status.append('未获取到！')
                type.append('未获取到！')
                addr.append('未获取到！')
                home.append('未获取到！')
        datas = {}
        datas['企业名称'] = company_name
        datas['纳税人识别号'] = no
        datas['法定代表人'] = name
        datas['登记状态'] = status
        datas['企业类型'] = type
        datas['企业地址'] = addr
        datas['登记机关'] = home
        print(datas)
        input_execl(datas, sheet_name)

if __name__ == '__main__':
    t1 = threading.Thread(target=run,args=())
    t2 = threading.Thread(target=run, args=())
    t1.start()
    t2.start()